package eu.europeana.cloud.service.dps.storm.topologies.text; import backtype.storm.Config; import backtype.storm.ILocalCluster; import backtype.storm.Testing; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.generated.StormTopology; import backtype.storm.testing.AckTracker; import backtype.storm.testing.CompleteTopologyParam; import backtype.storm.testing.FeederSpout; import backtype.storm.testing.MockedSources; import backtype.storm.testing.TestJob; import backtype.storm.testing.TrackedTopology; import backtype.storm.topology.TopologyBuilder; import com.rits.cloning.Cloner; import eu.europeana.cloud.service.dps.PluginParameterKeys; import eu.europeana.cloud.service.dps.service.zoo.ZookeeperKillService; import eu.europeana.cloud.service.dps.storm.AbstractDpsBolt; import eu.europeana.cloud.service.dps.storm.StormTaskTuple; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.*; import org.apache.commons.io.IOUtils; import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.assertEquals; import org.junit.runner.RunWith; import static org.mockito.Matchers.anyLong; import static org.mockito.Matchers.anyString; import org.mockito.Mockito; import org.powermock.api.mockito.PowerMockito; import org.powermock.core.classloader.annotations.PowerMockIgnore; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; /** * Class for test {@link ExtractBolt}. * @author Pavel Kefurt <Pavel.Kefurt@gmail.com> */ @RunWith(PowerMockRunner.class) @PrepareForTest(AbstractDpsBolt.class) @PowerMockIgnore({"javax.management.*", "javax.security.*"}) public class ExtractBoltTest { private final String storeStream = "storeStream"; private final String informStream = "informStream"; private final String pdfFilePath = "/rightTestFile.pdf"; private final String txtFilePath = "/ascii-file.txt"; private final String imgFilePath = "/Koala.jpg"; @Test public void acksTest() throws Exception { //--- prepare zookeeper kill service mock ZookeeperKillService zooKillMock = Mockito.mock(ZookeeperKillService.class); Mockito.when(zooKillMock.hasKillFlag(anyString(), anyLong())).thenReturn(false); PowerMockito.whenNew(ZookeeperKillService.class).withAnyArguments().thenReturn(zooKillMock); Testing.withTrackedCluster(new TestJob() { @Override public void run(ILocalCluster cluster) throws IOException, AlreadyAliveException, InvalidTopologyException { AckTracker tracker = new AckTracker(); FeederSpout spout = new FeederSpout(StormTaskTuple.getFields()); spout.setAckFailDelegate(tracker); //build topology TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("testSpout", spout); builder.setBolt("extractBolt", new ExtractTextBolt(informStream, storeStream)) .shuffleGrouping("testSpout"); StormTopology topology = builder.createTopology(); TrackedTopology tt = Testing.mkTrackedTopology(cluster, topology); //topology config Config config = new Config(); config.setNumWorkers(1); //config.setDebug(true); cluster.submitTopology("testTopology", config, tt.getTopology()); //prepare test data List<StormTaskTuple> data = prepareInputData(); for(StormTaskTuple tuple: data) { spout.feed(tuple.toStormTuple()); //Waits until topology is idle and 'amt' more tuples have been emitted by spouts Testing.trackedWait(tt, 1, 60000); //topology, amt, timeout } assertEquals(data.size(), tracker.getNumAcks()); } }); } @Test public void outputsTest() throws Exception { //--- prepare zookeeper kill service mock ZookeeperKillService zooKillMock = Mockito.mock(ZookeeperKillService.class); Mockito.when(zooKillMock.hasKillFlag(anyString(), anyLong())).thenReturn(false); PowerMockito.whenNew(ZookeeperKillService.class).withAnyArguments().thenReturn(zooKillMock); Testing.withLocalCluster(new TestJob() { @Override public void run(ILocalCluster cluster) throws IOException { //build topology TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("testSpout", new FeederSpout(StormTaskTuple.getFields())); builder.setBolt("extractBolt", new ExtractTextBolt(informStream, storeStream)) .shuffleGrouping("testSpout"); StormTopology topology = builder.createTopology(); //topology config Config config = new Config(); config.setNumWorkers(1); config.setDebug(true); //prepare the mock data List<StormTaskTuple> data = prepareInputData(); MockedSources mockedSources = new MockedSources(); for(StormTaskTuple tuple: data) { mockedSources.addMockData("testSpout", tuple.toStormTuple()); } CompleteTopologyParam completeTopology = new CompleteTopologyParam(); completeTopology.setMockedSources(mockedSources); completeTopology.setStormConf(config); completeTopology.setTimeoutMs(60000); Map result = Testing.completeTopology(cluster, topology, completeTopology); List touplesForStore = Testing.readTuples(result, "extractBolt", storeStream); assertEquals(4, touplesForStore.size()); List touplesForInform = Testing.readTuples(result, "extractBolt", informStream); assertEquals(9, touplesForInform.size()); List touplesForNotification = Testing.readTuples(result, "extractBolt", ExtractTextBolt.NOTIFICATION_STREAM_NAME); assertEquals(37, touplesForNotification.size()); } }); } private List<StormTaskTuple> prepareInputData() throws IOException { List<StormTaskTuple> ret = new ArrayList(); List<InputStream> inputDatas= new ArrayList(); String[] paths = {pdfFilePath, txtFilePath, imgFilePath}; for(String path: paths) { //InputStream is = new ByteArrayInputStream(IOUtils.toByteArray(new FileInputStream(path))); //fileInputStream not supported reset InputStream is = new ByteArrayInputStream(IOUtils.toByteArray(getClass().getResourceAsStream(path))); //fileInputStream not supported reset is.mark(0); inputDatas.add(is); } byte[] a = {}; InputStream is = new ByteArrayInputStream(a); is.mark(0); inputDatas.add(is); inputDatas.add(null); List<Map<String, String>> params = new ArrayList(); Map<String, String> param; param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, "True"); param.put(PluginParameterKeys.REPRESENTATION_NAME, "pdf"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, "False"); param.put(PluginParameterKeys.REPRESENTATION_NAME, "pdf"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, "True"); param.put(PluginParameterKeys.REPRESENTATION_NAME, "txt"); //can read everything (e.g. pdf) params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, "False"); param.put(PluginParameterKeys.REPRESENTATION_NAME, "txt"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.REPRESENTATION_NAME, "pdf"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, "fdsa"); param.put(PluginParameterKeys.REPRESENTATION_NAME, "pdf"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.STORE_EXTRACTED_TEXT, ""); param.put(PluginParameterKeys.REPRESENTATION_NAME, "pdf"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.REPRESENTATION_NAME, "xxx"); param.put(PluginParameterKeys.FILE_FORMATS, "{\"xxx\":\"pdf\"}"); params.add(param); param = new HashMap<>(); param.put(PluginParameterKeys.REPRESENTATION_NAME, "xxx"); param.put(PluginParameterKeys.FILE_FORMATS, "{\"xxx\":\"pdf\"}"); param.put(PluginParameterKeys.EXTRACTORS, "{\"pdf\":\"tika_extractor\"}"); params.add(param); param = new HashMap<>(); params.add(param); StormTaskTuple test; int i= 1; for(InputStream input: inputDatas) { for(Map<String, String> p: params) { test = new StormTaskTuple(i++, "testTask", "fileUrl", null, new Cloner().deepClone(p)); test.setFileData(input); ret.add(test); if(input != null) { input.reset(); } } } return ret; } }